欢迎关注“小丫画图”公众号,同名知识星球等你加入

小丫微信: epigenomics E-mail: figureya@126.com

作者:LiYin

小丫编辑校验

需求描述

用R复现文章里的图

出自https://onlinelibrary.wiley.com/doi/abs/10.1111/all.13222

应用场景

visualizing genomic variations, including mutation patterns, copy number variations (CNVs), expression patterns, and methylation patterns.

支持hg18、hg19、mm9、mm10

例如像例文那样展示多个芯片基因的表达变化和之间的link关系

参考资料:https://journals.sagepub.com/doi/10.4137/CIN.S13495

https://www.bioconductor.org/packages/devel/bioc/html/OmicCircos.html

环境设置

使用国内镜像安装包

options("repos"= c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
options(BioC_mirror="http://mirrors.ustc.edu.cn/bioc/")
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("OmicCircos", version = "3.8")

加载包

library(S4Vectors)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colMeans, colnames, colSums, dirname, do.call, duplicated,
##     eval, evalq, Filter, Find, get, grep, grepl, intersect,
##     is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
##     paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
##     Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which, which.max,
##     which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
library(tidyverse)
## ── Attaching packages ────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.3.0
## ✔ tibble  2.0.1     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.3.1     ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## ── Conflicts ───────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine()    masks BiocGenerics::combine()
## ✖ tidyr::expand()     masks S4Vectors::expand()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks S4Vectors::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ dplyr::rename()     masks S4Vectors::rename()
library(OmicCircos)
## Loading required package: GenomicRanges
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: GenomeInfoDb
Sys.setenv(LANGUAGE = "en") #显示英文报错信息
options(stringsAsFactors = FALSE) #禁止chr转成factor

输入数据

如果你的数据已经保存为easy_input.csv格式,就可以跳过这步,进入“开始画图”

gene.location <- read.csv("gene.location.csv")
head(gene.location)
##   chromosome_name start_position end_position strand ensembl_gene_id
## 1              13       23551994     23552136     -1 ENSG00000223116
## 2              13       23708313     23708703      1 ENSG00000233440
## 3              13       23726725     23726825     -1 ENSG00000207157
## 4              13       23743974     23744736     -1 ENSG00000229483
## 5              13       23791571     23791673     -1 ENSG00000252952
## 6              13       23817659     23821323      1 ENSG00000235205
##   entrezgene external_gene_id
## 1         NA       AL157931.1
## 2         NA          HMGA1P6
## 3         NA           RNY3P4
## 4         NA        LINC00362
## 5  100873766         RNU6-58P
## 6         NA         TATDN2P3
easy_input <- read.csv('signal.csv')
head(easy_input)
##         id color group1_s1 group1_s2 group2_s1 group2_s2 group2_s3
## 1 PPAPDC1A black     1.639     0.214    -0.377    -3.097     1.393
## 2     SHC4 black     1.631    -0.505    -0.651    -0.973    -0.048
## 3   ZNF552 black     0.240    -1.252    -1.879    -0.195    -1.691
## 4  IL12RB2 black    -0.141     2.780     2.338     1.308     0.415
## 5     ABAT black    -0.981    -0.896    -2.019     1.412    -1.366
## 6     WWP1 black    -1.345    -0.620     0.389     0.301    -0.321
##   group3_s1 group3_s2 group3_s3
## 1     0.911     1.320    -0.154
## 2    -0.666     0.503    -0.349
## 3    -0.012    -1.133     0.354
## 4     1.321     0.025     2.385
## 5    -1.994    -0.636    -0.885
## 6    -0.900    -1.251    -0.264
# 提取基因在染色体上的位置
gene_list <- easy_input$id
# 此处根据gene symbol提取位置信息
gene_list_chr <- gene.location[gene.location$external_gene_id %in% gene_list,]
# 只保留三列:染色体、起始位置、gene symbol
gene_list_chr <- gene_list_chr[,c(1,2,7)]
colnames(gene_list_chr) <- c('chr','position','id')
head(gene_list_chr)
##      chr  position      id
## 3145   9   2621834   VLDLR
## 4198  21  31586324   CLDN8
## 5908   7 142829170     PIP
## 6715   6 137464968 IL22RA2
## 7146  15  28000021    OCA2
## 8293  17  37366789   STAC2
# 合并染色体位置和signal
gene_chr_fc <- merge(gene_list_chr, easy_input, by='id')
gene_chr_fc <- gene_chr_fc[!duplicated(gene_chr_fc$id),]

write.csv(gene_chr_fc, "easy_input.csv", row.names = F)

分别准备图中各部分所需的数据

热图

gene_chr_fc <- read.csv("easy_input.csv")
head(gene_chr_fc)
##       id chr  position color group1_s1 group1_s2 group2_s1 group2_s2
## 1   ABAT  16   8768422 black    -0.981    -0.896    -2.019     1.412
## 2  ABCC8  11  17414432 black    -2.030     3.128    -3.019     2.328
## 3   AFF3   2 100162323 black    -1.376     1.755     1.066     1.073
## 4  AGBL2  11  47681143 black    -2.572    -0.516    -1.120     2.709
## 5  AGTR1   3 148415571 black     0.160     2.922    -0.966    -2.609
## 6 AKR7A3   1  19609052 black    -1.560     1.790    -1.542     1.223
##   group2_s3 group3_s1 group3_s2 group3_s3
## 1    -1.366    -1.994    -0.636    -0.885
## 2     2.058    -2.613     0.514    -2.572
## 3    -1.844    -0.392    -0.842    -0.760
## 4    -0.210    -1.800    -0.300    -0.709
## 5    -0.456    -2.007    -0.802    -2.035
## 6     1.252     0.743     2.022    -0.871
# 画三组样品的热图,分3层来
# 若不分3层,热图则会挨在一起,无法分开
exp1 <- gene_chr_fc[,c(2,3,1,5,6)]
head(exp1)
##   chr  position     id group1_s1 group1_s2
## 1  16   8768422   ABAT    -0.981    -0.896
## 2  11  17414432  ABCC8    -2.030     3.128
## 3   2 100162323   AFF3    -1.376     1.755
## 4  11  47681143  AGBL2    -2.572    -0.516
## 5   3 148415571  AGTR1     0.160     2.922
## 6   1  19609052 AKR7A3    -1.560     1.790
exp2 <- gene_chr_fc[,c(2,3,1,7,8,9)]
exp3 <- gene_chr_fc[,c(2,3,1,10,11,12)]

连线

处理基因名字为一一对应关系,红色连红色,蓝色连蓝色

gene_label <- gene_chr_fc[, c(2,3,1,4)]

# 按照1-22,x,y排序
# 有的数据没有X或Y,小鼠只有1:20也没关系,不用改
gene_label$chr <- factor(gene_label$chr, levels = c(1:22, "X", "Y"))
# 下面去掉多余的factor。也可以不运行这行,对结果没影响
gene_label$chr <- droplevels(gene_label$chr, exclude = if(anyNA(levels(gene_label$chr))) NULL else NA)
# 排序
gene_label <- gene_label[order(gene_label$chr),]
tail(gene_label)
##     chr position       id color
## 106  21 35885440    RCAN1 black
## 109  21 43159529    RIPK4 black
## 7    22 39348746 APOBEC3A black
## 127  22 29190543     XBP1 black
## 37    X 13587724    EGFL6   red
## 48    X 13789150    GPM6B   red
# 红色基因
gene_link1 <- filter(gene_label, gene_label$color == 'red')
gene_link1 <- gene_link1[, -4]
head(gene_link1)
##   chr  position       id
## 1   3 128628709 KIAA1257
## 2   6  19837617      ID4
## 3  11  72465774  STARD10
## 4   X  13587724    EGFL6
## 5   X  13789150    GPM6B
id11 <- data_frame(id = combn(gene_link1$id, 2)[1, ])
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
head(id11)
## # A tibble: 6 x 1
##   id      
##   <chr>   
## 1 KIAA1257
## 2 KIAA1257
## 3 KIAA1257
## 4 KIAA1257
## 5 ID4     
## 6 ID4
id1.1 <- data_frame(id1.1 = combn(gene_link1$id, 2)[2, ])
genelink1 <- gene_link1 %>%
  right_join(id11)
## Joining, by = "id"
genelink11 <- gene_link1 %>%
  rename(id1.1=id) %>%
  right_join(id1.1)
## Joining, by = "id1.1"
gene_link1 <- cbind(genelink1, genelink11)

# 蓝色基因
gene_link2 <- filter(gene_label, gene_label$color == 'blue')
gene_link2 <- gene_link2[, -4]
id22 <- data_frame(id=combn(gene_link2$id, 2)[1, ])
id2.1<- data_frame(id2.1=combn(gene_link2$id, 2)[2, ])
genelink2 <- gene_link2 %>%
  right_join(id22)
## Joining, by = "id"
genelink22 <- gene_link2 %>%
  rename(id2.1=id) %>%
  right_join(id2.1)
## Joining, by = "id2.1"
gene_link2 <- cbind(genelink2, genelink22)

开始画图

R代表半径位置,W是宽度,cir是染色体

图中的三组样品的文字需要用AI进行标记

包里自带的写染色体名字的位置不合理,也没有参数可以调整,这里提供两种方法来处理:

方法一:单独画染色体名字

pdf('OmicCircos.pdf',width = 8,height = 8)
par(mar=c(0, 0, 0, 0)); #准备画板
plot(c(1,800), c(1,800), type="n", axes=FALSE, xlab="", ylab="", main="");

#画染色体scale
circos(R=250, cir="hg19", W=5, type="chr", 
       print.chr.lab=FALSE, #自带的名字会跟scale重叠
       scale=TRUE) 

#画染色体名字
#染色体长度信息,从https://genome.ucsc.edu/goldenPath/help/hg19.chrom.sizes下载,然后整理而成
chr_info <- read.table("hg19.chrom.sizes.txt")
head(chr_info)
##   V1        V2
## 1  1 249250621
## 2  2 243199373
## 3  3 198022430
## 4  4 191154276
## 5  5 180915260
## 6  6 171115067
chr_label <- data.frame(chr = chr_info$V1, position = (chr_info$V2 + 1)/2, id = chr_info$V1)
circos(R=270, cir="hg19", W=2, mapping=chr_label, type="label2", col = "black", side="in", cex=0.5);

#画基因名字,side="in"则基因在圈里面
circos(R=280, cir="hg19", W=10, mapping=gene_label, type="label", lwd = 0.4, col = gene_label$color, 
       side="out", cex=0.5);  

#画第1个热图
circos(R=210, cir="hg19", W=40, mapping=exp1,  type="heatmap2",   
       cluster=FALSE, col.bar=FALSE, col=NULL,scale = TRUE) 
## Warning in circos(R = 210, cir = "hg19", W = 40, mapping = exp1, type =
## "heatmap2", : NAs introduced by coercion
#画第2个热图
circos(R=160, cir="hg19", W=60, mapping=exp2,  type="heatmap2",
       cluster=FALSE, col.bar=FALSE, col=NULL,scale = TRUE) 
## Warning in circos(R = 160, cir = "hg19", W = 60, mapping = exp2, type =
## "heatmap2", : NAs introduced by coercion
#画第3个热图
circos(R=110, cir="hg19", W=60, mapping=exp3,  type="heatmap2",
       cluster=FALSE, col.bar=FALSE, col=NULL,scale = TRUE) 
## Warning in circos(R = 110, cir = "hg19", W = 60, mapping = exp3, type =
## "heatmap2", : NAs introduced by coercion
#画link
circos(R=100, cir="hg19", W=10, mapping=gene_link2, type="link2", lwd=2, col='blue') 
circos(R=100, cir="hg19", W=10, mapping=gene_link1, type="link2", lwd=2, col='red')
dev.off()
## quartz_off_screen 
##                 2

方法二:修改包里的代码

修改了染色体名字的位置,顺便修改了scale的位置。

修改后的函数命名为circos_plus,保存为sourceOmiccircos.R,位于当前文件夹。查找#Ya#可看到修改的代码。

source("sourceOmiccircos.R")
pdf('OmicCircos_plus.pdf',width = 8,height = 8)
par(mar=c(0, 0, 0, 0)); #准备画板
plot(c(1,800), c(1,800), type="n", axes=FALSE, xlab="", ylab="", main="");

#画染色体
circos_plus(R=250, cir="hg19", W=5, type="chr", 
       print.chr.lab=TRUE, scale=TRUE) 

#画基因名字,side="in"则基因在圈里面
circos_plus(R=280, cir="hg19", W=10, mapping=gene_label, type="label", lwd = 0.4, col = gene_label$color, 
       side="out", cex=0.5);  

#画第1个热图
circos_plus(R=210, cir="hg19", W=40, mapping=exp1,  type="heatmap2",   
       cluster=FALSE, col.bar=FALSE, col=NULL,scale = TRUE) 
## Warning in circos_plus(R = 210, cir = "hg19", W = 40, mapping = exp1, type
## = "heatmap2", : NAs introduced by coercion
#画第2个热图
circos_plus(R=160, cir="hg19", W=60, mapping=exp2,  type="heatmap2",
       cluster=FALSE, col.bar=FALSE, col=NULL,scale = TRUE) 
## Warning in circos_plus(R = 160, cir = "hg19", W = 60, mapping = exp2, type
## = "heatmap2", : NAs introduced by coercion
#画第3个热图
circos_plus(R=110, cir="hg19", W=60, mapping=exp3,  type="heatmap2",
       cluster=FALSE, col.bar=FALSE, col=NULL,scale = TRUE) 
## Warning in circos_plus(R = 110, cir = "hg19", W = 60, mapping = exp3, type
## = "heatmap2", : NAs introduced by coercion
#画link,col跟基因颜色一致
circos_plus(R=100, cir="hg19", W=10, mapping=gene_link2, type="link2", lwd=2, col='blue') 
circos_plus(R=100, cir="hg19", W=10, mapping=gene_link1, type="link2", lwd=2, col='red')
dev.off()
## quartz_off_screen 
##                 2
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14.2
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2       OmicCircos_1.20.0    GenomicRanges_1.34.0
##  [4] GenomeInfoDb_1.18.1  IRanges_2.16.0       forcats_0.3.0       
##  [7] stringr_1.3.1        dplyr_0.7.8          purrr_0.3.0         
## [10] readr_1.3.1          tidyr_0.8.2          tibble_2.0.1        
## [13] ggplot2_3.1.0        tidyverse_1.2.1      S4Vectors_0.20.1    
## [16] BiocGenerics_0.28.0 
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5       xfun_0.4               haven_2.0.0           
##  [4] lattice_0.20-38        colorspace_1.4-0       generics_0.0.2        
##  [7] htmltools_0.3.6        yaml_2.2.0             utf8_1.1.4            
## [10] rlang_0.3.1            pillar_1.3.1           glue_1.3.0            
## [13] withr_2.1.2            modelr_0.1.3           readxl_1.2.0          
## [16] GenomeInfoDbData_1.2.0 bindr_0.1.1            plyr_1.8.4            
## [19] zlibbioc_1.28.0        munsell_0.5.0          gtable_0.2.0          
## [22] cellranger_1.1.0       rvest_0.3.2            evaluate_0.12         
## [25] knitr_1.21             fansi_0.4.0            broom_0.5.1           
## [28] Rcpp_1.0.0             scales_1.0.0           backports_1.1.3       
## [31] XVector_0.22.0         jsonlite_1.6           hms_0.4.2             
## [34] digest_0.6.18          stringi_1.2.4          grid_3.5.1            
## [37] bitops_1.0-6           cli_1.0.1              tools_3.5.1           
## [40] magrittr_1.5           RCurl_1.95-4.11        lazyeval_0.2.1        
## [43] crayon_1.3.4           pkgconfig_2.0.2        xml2_1.2.0            
## [46] lubridate_1.7.4        assertthat_0.2.0       rmarkdown_1.11        
## [49] httr_1.4.0             rstudioapi_0.9.0       R6_2.3.0              
## [52] nlme_3.1-137           compiler_3.5.1